* Figure2.do  07/25/22
* Rx share compared to opioid levels by region, year, and demographic group

do Figure2prep  // Note that 2021 pop is copied from 2020
keep if ucd=="Drug" & (mcd=="Opioid" | mcd=="T400T401T404" | mcd=="T402T403") & (age=="0-44" | age=="45-64" | age=="65+")
encode mcdtitle, gen(mcdnum)
reshape groups mcdnum 1 2 3
reshape cons age state gender year pop
reshape var deaths
reshape wide
ren deaths1 deathsall  // will less than Rx + Im due to poly drug use
ren deaths2 deathsim
ren deaths3 deathsrx
gen polysh = (deathsim + deathsrx)/deathsall - 1
gen deathrtall = deathsall*100000/pop
gen imsh = deathsim/(deathsim+deathsrx)
gen rxsh = 1 - imsh
gen lndeathrtall = log(deathrtall)

gen byte female = gender==`"{"Female"}"'
encode age, gen(agenum)
encode state, gen(censusdiv)
set more off

* changes over time
sort female agenum year
egen cumdeaths = sum(deathsall), by(female agenum censusdiv)
gen lnxregpool=.
gen lnxregpoolse=.
gen lnxregpooln=.
gen xregpooln=.
foreach yr of numlist 1999/2021 {
  reg lndeathrtall rxsh i.female##i.agenum if year==`yr' [aw=cumdeaths]
  replace lnxregpool = _b[rxsh] if year==`yr'
  replace lnxregpoolse = _se[rxsh] if year==`yr'
  replace lnxregpooln = e(N) if year==`yr'
  reg deathrtall rxsh i.female##i.agenum if year==`yr' [aw=cumdeaths]  // for calculating how many obs, if any, lost from log specification
  replace xregpooln = e(N) if year==`yr'
}
gen upperpoolln = lnxregpool + (1.96*lnxregpoolse)
gen lowerpoolln = lnxregpool - (1.96*lnxregpoolse)
gen obslosttoln = xregpooln - lnxregpooln
set more on

gen byte zeros = 0

#delimit ;
twoway (rcap lowerpoolln upperpoolln year if female==1 & agenum==1, lcolor(gray))
  (scatter lnxregpool year if female==1 & agenum==1, msymbol(S) mcolor(black))
  (line lnxregpool year if female==1 & agenum==1, lcolor(black))
  (line zeros year if female==1 & agenum==1, lcolor(gray) lpattern(dash)),
  title("Figure 2.  The cross-area relation between opioid death" "rates and their Rx-Im composition, by year")
  legend(order(2 "Point estimate" 1 "95% confidence interval") region(lcolor(white)) size(small))
  graphregion(color(white)) ylabel(,nogrid) xtitle("") ytitle("Cross-area regression coefficient")
  note("Notes: Observations are age group by gender by Census division.  Each year, log opioid deaths per 100K is"
  "regressed on gender-age group interactions and the Rx share of opioid deaths, weighting by total opioid deaths 1999-2021."
  "The horizontal axis is shown as a dashed line." "Source: CDC Wonder.", size(vsmall));
#delimit cr

list cumdeaths year if lndeathrtall==.
tabstat cumdeaths if lndeathrtall!=., stat(sum) format(%8.0fc) by(year)
